rm(list = ls())

library(microsynth)
library(xtable)
library(tidyverse)

rank_fun <- function(x){
  su = sort(unique(x))
  for (i in 1:length(su)) x[x==su[i]] = i
  return(x)
}

## Load county-level data 1990 -- 2021

dat <- readRDS('data/nrw_rlp_county_1990_2021_clean.rds') %>%
  mutate(period = rank_fun(year)) %>%
  mutate(greens_zweit = greens_zweit*100)

## Drop weakly treated areas 

dat <- dat %>%
  filter(treat_categ != 'schwach')

## Define binary treatment 

dat <- dat %>%
  mutate(treat_heavy = ifelse(treat_categ %in% c('schwer', 'sehr schwer'), 1, 0))  

## Drop units for which we don't observe complete panel 

miss_county <- dat %>% 
  group_by(county_id) %>% 
  count() %>%
  filter(n < 9) %>%
  pull(county_id)

dat <- dat %>% 
  filter(!county_id %in% miss_county)

table(dat$year, dat$treat_heavy)

## Estimation 

sc_out <- microsynth(as.data.frame(dat), 
                     idvar= 'county_id', 
                     timevar = 'period', 
                     intvar = 'treat_heavy', 
                     result.var = c('greens_zweit'), 
                     start.pre = 1, 
                     end.pre = 8, 
                     perm = 5000,
                     check.feas = TRUE, 
                     use.backup = TRUE,
                     jack = T, 
                     test = "twosided",
                     n.cores = 1)

plot_dat <- sc_out$Plot.Stats 

## Weighted mean in treated/control groups

synth_treated <- plot_dat$Treatment %>%
  t() %>%
  as_tibble() %>%
  mutate(group = 'Heavily affected counties',
         period = unique(dat$year))

synth_control <- plot_dat$Control %>%
  t() %>%
  as_tibble() %>%
  mutate(group = 'Synthetic control',
         period = unique(dat$year))

## Combine 

plot_no_se <- bind_rows(synth_treated, synth_control) %>%
  mutate(greens_zweit = greens_zweit / 9) ## scale by size of treated group

## Plot this 

p1 <- ggplot(plot_no_se, aes(x = as.numeric(period), y = greens_zweit, 
                             group = group, col = group,
                             fill = group)) + 
  geom_line() + 
  geom_point(shape = 21) + 
  theme_bw() + 
  labs(x = '',
       y = 'Green party vote share (in p.p.)') + 
  theme(legend.title = element_blank(),
        legend.position = 'bottom') + 
  scale_color_manual(values = c('black', 'gray')) + 
  scale_y_continuous(limits = c(0, 14), breaks = seq(0, 14, 2)) + 
  scale_fill_manual(values = c('black', 'gray')) + 
  geom_vline(xintercept = 2020, linetype = 'dotted') + 
  scale_x_continuous(breaks = unique(as.numeric(dat$year)))

p1






